Data Visualization Project 02
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 4.0.3
## -- Conflicts ------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(maps)
## Warning: package 'maps' was built under R version 4.0.3
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(GGally)
## Warning: package 'GGally' was built under R version 4.0.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
cb_palette <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499",
"#44AA99", "#999933", "#882255", "#661100", "#6699CC", "#888888", "#000000")
marathon_2017 <- read.csv('../data/marathon_results_2017.csv')
ggplotly(marathon_2017 %>%
filter(Gender < 101 & M.F == 'M') %>%
ggplot(aes(x = Gender, y = Age), alpha = 1) +
geom_point(aes(color = Country), shape = 21) +
scale_color_manual(values = cb_palette) +
theme_classic()) %>%
layout(legend = list(orientation = 'h', y = -0.3)) %>%
layout(title='Age and Position on Male Marathon 2017', xaxis = list(title ='Position'))
ggplotly(marathon_2017 %>%
filter(Gender < 101 & M.F == 'F') %>%
ggplot(aes(x = Gender, y = Age), alpha = 1) +
geom_point(aes(color = Country), shape = 21) +
scale_color_manual(values = cb_palette) +
theme_classic()) %>%
layout(legend = list(orientation = 'h', y = -0.3)) %>%
layout(title='Age and Position on Female Marathon 2017', xaxis = list(title ='Position'))
marathon_2017 %>%
filter(Gender < 101) %>%
spread(M.F, Gender) %>%
plot_ly() %>%
add_trace(
type = 'scatter',
mode = 'lines+markers',
x = ~M,
y = ~Pace,
text = ~Name,
name = "Male"
) %>%
add_trace(
type = 'scatter',
mode = 'lines+markers',
x = ~F,
y = ~Pace,
text = ~Name,
name = "Female"
) %>%
layout(title='Pace comparison between male and female first 100 positions on marathon 2017', xaxis = list(title ='Position'))
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
marathon_2017 %>%
select(Overall, Gender, Age) %>%
ggpairs()

countries <- read.csv('../data/countries.csv')
WorldData <- map_data('world') %>% filter(region != "Antarctica") %>% fortify
spread_countries <- countries %>%
spread(alpha3, name)
spread_countries <- spread_countries[,3:ncol(spread_countries)]
spread_countries <- gather(spread_countries) %>%
group_by(key) %>%
subset(!is.na(value)) %>%
mutate(key = toupper(key)) %>%
spread(key, value)
spread_countries
## # A tibble: 1 x 193
## AFG AGO ALB AND ARE ARG ARM ATG AUS AUT AZE BDI BEL
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Afgha~ Ango~ Alba~ Ando~ Unit~ Arge~ Arme~ Anti~ Aust~ Aust~ Azer~ Buru~ Belg~
## # ... with 180 more variables: BEN <chr>, BFA <chr>, BGD <chr>, BGR <chr>,
## # BHR <chr>, BHS <chr>, BIH <chr>, BLR <chr>, BLZ <chr>, BOL <chr>,
## # BRA <chr>, BRB <chr>, BRN <chr>, BTN <chr>, BWA <chr>, CAF <chr>,
## # CAN <chr>, CHE <chr>, CHL <chr>, CHN <chr>, CIV <chr>, CMR <chr>,
## # COD <chr>, COG <chr>, COL <chr>, COM <chr>, CPV <chr>, CRI <chr>,
## # CUB <chr>, CYP <chr>, CZE <chr>, DEU <chr>, DJI <chr>, DMA <chr>,
## # DNK <chr>, DOM <chr>, DZA <chr>, ECU <chr>, EGY <chr>, ERI <chr>,
## # ESP <chr>, EST <chr>, ETH <chr>, FIN <chr>, FJI <chr>, FRA <chr>,
## # FSM <chr>, GAB <chr>, GBR <chr>, GEO <chr>, GHA <chr>, GIN <chr>,
## # GMB <chr>, GNB <chr>, GNQ <chr>, GRC <chr>, GRD <chr>, GTM <chr>,
## # GUY <chr>, HND <chr>, HRV <chr>, HTI <chr>, HUN <chr>, IDN <chr>,
## # IND <chr>, IRL <chr>, IRN <chr>, IRQ <chr>, ISL <chr>, ISR <chr>,
## # ITA <chr>, JAM <chr>, JOR <chr>, JPN <chr>, KAZ <chr>, KEN <chr>,
## # KGZ <chr>, KHM <chr>, KIR <chr>, KNA <chr>, KOR <chr>, KWT <chr>,
## # LAO <chr>, LBN <chr>, LBR <chr>, LBY <chr>, LCA <chr>, LIE <chr>,
## # LKA <chr>, LSO <chr>, LTU <chr>, LUX <chr>, LVA <chr>, MAR <chr>,
## # MCO <chr>, MDA <chr>, MDG <chr>, MDV <chr>, MEX <chr>, MHL <chr>, ...
ggplot() +
geom_map(data = WorldData, map = WorldData,
aes(x = long, y = lat, group = group, map_id=region),
fill = "white", colour = "#7f7f7f", size=0.5) +
geom_map(data = marathon_2017, map=WorldData,
aes(fill=Gender, map_id=City),
colour="#7f7f7f", size=0.5) +
coord_map("rectangular", lat0=0, xlim=c(-180,180), ylim=c(-60, 90)) +
theme_bw() +
ggtitle("Athletes by country") +
labs(fill = "Number of athletes", x = '', y = '')
## Warning: Ignoring unknown aesthetics: x, y
